function obj_eval = build_obj(param,fp,col_long,data,data_mom)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% STEP 1: Unpack parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% A) Preferences

mean_alpha1 = param(fp.par.alpha1);
mean_alpha2 = param(fp.par.alpha2);
mean_alpha3 = param(fp.par.alpha3);
mean_lambda1 = param(fp.par.lambda1);
mean_lambda2 = param(fp.par.lambda2);

% Assume altruism parameter is exogenously fixed throughout the estimation
phi = fp.varphi;
pref_draw = [mean_alpha1; mean_alpha2; mean_alpha3; mean_lambda1; mean_lambda2];

% Parental preferences
temp1 = exp(pref_draw(1));
temp2 = exp(pref_draw(2));
temp3 = exp(pref_draw(3));
temp4 = 1;
sum_temp = temp1+temp2+temp3+temp4;
vp.alpha1 = temp1 / sum_temp;
vp.alpha2 = temp2 / sum_temp;
vp.alpha3 = temp3 / sum_temp;
vp.alpha4 = temp4 / sum_temp;

% Child preferences
temp1 = exp(pref_draw(4));
temp2 = exp(pref_draw(5));
temp3 = 1;
sum_temp = temp1+temp2+temp3;
vp.lambda1 = temp1 / sum_temp;
vp.lambda2 = temp2 / sum_temp;
vp.lambda3 = temp3 / sum_temp;

% Altruistic parental weights: u_p_tilde = (1-phi)*u_p + phi*u_c
% Preference tilde parameters for parents after substituting out
vp.alpha1_tilde = (1-phi)*vp.alpha1;
vp.alpha2_tilde = (1-phi)*vp.alpha2;
vp.alpha3_tilde = (1-phi)*vp.alpha3;
vp.alpha4_tilde = (1-phi)*vp.alpha4 + phi*vp.lambda3;
vp.alpha5_tilde = phi*vp.lambda1;
vp.alpha6_tilde = phi*vp.lambda2;

% CCT Utility cost draws
cost_mean = exp(param(fp.par.cost_mean)); % mean has to be positive

if fp.do_boot_se == 0
    seednr = 12793; % fix seed number if not bootstrapping
elseif fp.do_boot_se == 1 % during bootstrap rounds, vary the seed in every round so we draw different costs
    seednr = fp.zeta_cost_seeds(fp.b_value);
end

% Generate set of heterogeneous monitoring costs (fixed over time within households)
rand('state',seednr); % fix seed for random draws
vp.cost_draws = exprnd(cost_mean,fp.N,fp.R);

%% B) Cognitive skill formation technology

% Unpack cognitive human capital parameters
% mom time tau1
delta1_intercept = param(fp.par.delta1_intercept);
delta1_slope = param(fp.par.delta1_slope);
delta1_educ = param(fp.par.delta1_educ);
% dad time tau2
delta2_intercept = param(fp.par.delta2_intercept);
delta2_slope = param(fp.par.delta2_slope);
delta2_educ = param(fp.par.delta2_educ);
% child self-investment time tauc
delta3_intercept = param(fp.par.delta3_intercept);
delta3_slope = param(fp.par.delta3_slope);
% child expenditures e
delta4_intercept = param(fp.par.delta4_intercept);
delta4_slope = param(fp.par.delta4_slope);
% lagged child skills k
delta5_intercept = param(fp.par.delta5_intercept);
delta5_slope = param(fp.par.delta5_slope);

% Unpack TFP parameters
temp = exp(param(fp.par.L0));
L0 = fp.L0_min + (fp.L0_max - fp.L0_min) * temp/(1+temp);
temp = exp(param(fp.par.L));
L = fp.L_min + (fp.L_max - fp.L_min) * temp/(1+temp);
temp = exp(param(fp.par.k));
k = fp.k_min + (fp.k_max - fp.k_min) * temp/(1+temp);
temp = exp(param(fp.par.t0));
t0 = fp.t0_min + (fp.t0_max - fp.t0_min) * temp/(1+temp);

% allow time productivity to vary with household observables
vp.delta1mat = NaN(fp.M,fp.N);
vp.delta2mat = NaN(fp.M,fp.N);
for q = 1:fp.N
    data_here = data(1+(fp.M+1)*(q-1):(fp.M+1)*q, : );
    if fp.compstat_SES_homtech == 1
        % In comparative exercise, shut down heterogeneity in technology parameters by SES
        mom_educ = fp.s1_median; % fix mom education to the sample median
        dad_educ = fp.s2_median; % fix dad education to the sample median
    elseif fp.compstat_SES_homtech == 0
        mom_educ = data_here(1,col_long.mom_educ);
        dad_educ = data_here(1,col_long.dad_educ);
    end
    for t = 1:fp.M
        vp.delta1mat(t,q) = exp(delta1_intercept + delta1_slope*t + delta1_educ*mom_educ);
        vp.delta2mat(t,q) = exp(delta2_intercept + delta2_slope*t + delta2_educ*dad_educ);
    end % t
end % q

vp.R = NaN(fp.M,1);
vp.delta3 = NaN(fp.M,1);
vp.delta4 = NaN(fp.M,1);
vp.delta5 = NaN(fp.M,1);
for t = 1:fp.M
    % Define TFP parameters using the logistic specification:
    vp.R(t) = L0 + (L-L0)/(1+exp(-k*(t-t0)));

    % Remaining cognitive skill parameters:
    vp.delta3(t,1) = exp(delta3_intercept + delta3_slope*t);
    vp.delta4(t,1) = exp(delta4_intercept + delta4_slope*t);
    vp.delta5(t,1) = exp(delta5_intercept + delta5_slope*t);
end

%% C) Child discount factor process

% Specify child stock of patience (n_t) as discrete process with parametrized Markov matrix
Z = length(fp.betac_vec); % number of grid points, should be Z = 3
markov_trans = zeros(Z,Z,2,fp.M); % rows = current state, columns = future state, 3rd dimension = CCT=0/1, 4th dimension =  child ages
% each element captures transition probability from n_t to n_t+1
% conditional on CCT_t = 0 or 1 and child age t

% Unpack parameters for case where Z == 3 grid points
if Z == 3
    % Specification uses 6 parameters, imposing symmetry
    % Bottom and middle state: parametrize probability to go up one level
    b0_up = param(fp.par.b0_up); % bottom state, intercept
    b1_up = param(fp.par.b1_up); % bottom state, age slope
    b2_up = param(fp.par.b2_up); % bottom state, CCT slope
    b3_up = param(fp.par.b3_up); % bottom state, interaction age*cct
    % Top and middle state: parametrize probability to go down one level
    b0_down = param(fp.par.b0_down); % top state, intercept
    b1_down = param(fp.par.b1_down); % top state, age slope
    % Impose symmetry on the two last ones
    b2_down = -b2_up; % top state, CCT slope
    b3_down = -b3_up; % top state, CCT slope

    tvec = 1:fp.M;

    for jj = 1:2
        CCT = (jj-1); % CCT = 0 (first slice) or 1 (second slice)
        % Bottom state:
        Dplus = exp(b0_up+b1_up*tvec+b2_up*CCT+b3_up*CCT*tvec);
        markov_trans(1,2,jj,:)= Dplus./(1+Dplus); % 1 to 2
        markov_trans(1,1,jj,:)= 1-markov_trans(1,2,jj,:); % 1 to 1
        % Top state:
        Dminus = exp(b0_down+b1_down*tvec+b2_down*CCT+b3_down*CCT*tvec);
        markov_trans(3,2,jj,:)= Dminus./(1+Dminus); % 3 to 2
        markov_trans(3,3,jj,:)= 1-markov_trans(3,2,jj,:); % 3 to 3
        % Middle state:
        markov_trans(2,3,jj,:)= Dplus./(1+Dplus+Dminus); % 2 to 3
        markov_trans(2,1,jj,:)= Dminus./(1+Dplus+Dminus); % 2 to 1
        markov_trans(2,2,jj,:)= 1./(1+Dplus+Dminus); % 2 to 2
    end

    vp.markov_trans = markov_trans;

    % Calculate expectation E_t(n_t+1 | n_t, CCT_t) = 3x2 for every period t = 1:M (note matrix index = t, not t+1)
    vp.expect_n_tp1 = NaN(Z,2,fp.M);
    for t = 1:fp.M
        vp.expect_n_tp1(:,1,t) = markov_trans(:,:,1,t)*fp.ngrid; % for CCT = 0, for all n_t = 1/2/3
        vp.expect_n_tp1(:,2,t) = markov_trans(:,:,2,t)*fp.ngrid; % for CCT = 1, for all n_t = 1/2/3
    end

    % Calculate expectation E_t(beta_t+1 | beta_t, CCT_t) = 3x2 for every period t = 1:M (note matrix index = t, not t+1)
    vp.expect_betac_tp1 = NaN(Z,2,fp.M);
    for t = 1:(fp.M)
        vp.expect_betac_tp1(:,1,t) = markov_trans(:,:,1,t)*fp.betac_vec; % for CCT = 0, for all beta_ct = 1/2/3
        vp.expect_betac_tp1(:,2,t) = markov_trans(:,:,2,t)*fp.betac_vec; % for CCT = 1, for all beta_ct = 1/2/3
    end

    % Next, specify initial conditions probabilities

    nu10 = param(fp.par.nu10); % bottom state, intercept
    nu11 = param(fp.par.nu11); % bottom state, age slope
    nu12 = param(fp.par.nu12); % bottom state, educ slope
    nu20 = param(fp.par.nu20); % middle state, intercept
    nu21 = param(fp.par.nu21); % middle state, age slope
    nu22 = param(fp.par.nu22); % middle state, educ slope

    agevec_init = 3:11; % vector of child ages in 1997
    educvec = 6:17; % range of father years of schooling
    betac_init_prob = NaN(length(agevec_init),length(educvec),Z); % Z = 3 layers
    for ii = 1:length(agevec_init) % possible initial ages in 1997 CDS
        for jj = 1:length(educvec)
            tmp1 = exp(nu10+nu11*agevec_init(ii)+nu12*educvec(jj)); % logit term for prob(beta_init = beta_low)
            tmp2 = exp(nu20+nu21*agevec_init(ii)+nu22*educvec(jj)); % logit term for prob(beta_init = beta_middle)

            tmpL=tmp1/(1+tmp1+tmp2);
            tmpM=tmp2/(1+tmp1+tmp2);
            tmpH=1-tmpL-tmpM;

            betac_init_prob(ii,jj,1)=tmpL;
            betac_init_prob(ii,jj,2)=tmpM;
            betac_init_prob(ii,jj,3)=tmpH;
        end
    end

    vp.betac_init_draws = NaN(fp.N,fp.R,2); % slice 1 saves the randomly drawn index Z (1/2/3) and slice 2 saves the actual beta_c value

    for i = 1:fp.N
        % mother/child i's data
        data_here = data(1+(fp.M+1)*(i-1):(fp.M+1)*i, : );
        % Load exogenous household characteristics
        age_init = data_here(1,col_long.child_age97); % constant over time
        if fp.compstat_SES_homprefs == 1
            % In comparative exercise, shut down heterogeneity in (initial) discount factor distribution by SES
            dad_educ = fp.s2_median;
        elseif fp.compstat_SES_homprefs == 0
            dad_educ = data_here(1,col_long.dad_educ);
        end

        % get matching indices
        tmpii = find(agevec_init==age_init);
        tmpjj = find(educvec == dad_educ);
        tmp_probs = betac_init_prob(tmpii,tmpjj,:);
        for r = 1:fp.R
            tmprnd = fp.betac_init_unifdraws(i,r);
            vp.betac_init_draws(i,r,1) = 1*(tmprnd <= tmp_probs(1)) + 2*(tmprnd > tmp_probs(1) & tmprnd <= sum(tmp_probs(1:2))) + 3*(tmprnd > sum(tmp_probs(1:2))); % verify this works for case with 3 grid points
            vp.betac_init_draws(i,r,2) = fp.betac_vec(vp.betac_init_draws(i,r,1));
        end
    end
else
    error('Markov matrix not yet specified for this case!')
end

%% D) Wage Offers

%intercept
vp.beta0_mom = param(fp.par.beta0_mom);
vp.beta0_dad = param(fp.par.beta0_dad);
%return to experience (age)
vp.beta1_mom = exp(param(fp.par.beta1_mom));
vp.beta1_dad = exp(param(fp.par.beta1_dad));
%return to school
vp.beta2_mom = exp(param(fp.par.beta2_mom));
vp.beta2_dad = exp(param(fp.par.beta2_dad));
% standard deviations of wage shocks
sigma_mom = exp(param(fp.par.sigma_mom));
sigma_dad = exp(param(fp.par.sigma_dad));
% correlation in epsilon draws
temp = exp(param(fp.par.wage_corr));
temp1 = temp / (1+temp);
wage_corr = -0.95 + (0.95 - (-0.95)).* temp1;

var1 = sigma_mom^2;
var2 = sigma_dad^2;
cov_wages = sigma_mom * sigma_dad * wage_corr;
cov_mat = [var1 cov_wages; cov_wages var2];
[chol_cov_mat_wages,p] = chol(cov_mat);
if p > 0
    disp('Return to solver: Covariance matrix of wages is not PD...')
    obj_eval = 1e20;
    cov_mat;
    return;
end

% Generate a set of correctly correlated wage epsilons for mom and dad
vp.wage_list = NaN*ones(fp.N,fp.R,fp.M+1,2);
for i = 1:fp.N
    % mother/child i's data
    data_here = data(1+(fp.M+1)*(i-1):(fp.M+1)*i, : );
    % Load exogenous household characteristics
    if fp.compstat_SES_homwages == 1
        % In comparative staticsexercise, shut down heterogeneity in wages by SES
        mom_educ = fp.s1_median;
        dad_educ = fp.s2_median;
    elseif fp.compstat_SES_homwages == 0
        mom_educ = data_here(1,col_long.mom_educ);
        dad_educ = data_here(1,col_long.dad_educ);
    end
    if fp.compstat_age_homwages == 1
        % In comparative statics exercise, shut down heterogeneity in wages by parental age
        mom_age = fp.age1_median * ones(fp.M+1,1);
        dad_age = fp.age2_median * ones(fp.M+1,1);
    elseif fp.compstat_age_homwages == 0
        mom_age = data_here(:,col_long.mom_age); % vector of size (fp.M + 1) * 1
        dad_age = data_here(:,col_long.dad_age); % vector of size (fp.M + 1) * 1
    end

    for r = 1:fp.R
        for t = 0:fp.M % ages 0-16: shifted index! See sim_funct.m
            eps_draw = [0;0] + chol_cov_mat_wages * [fp.wage_draws(i,r,t+1,1); fp.wage_draws(i,r,t+1,2)]; % normal numbers with correct means (0;0) and covariances
            vp.wage_list(i,r,t+1,1) = exp( vp.beta0_mom + vp.beta1_mom * mom_age(t+1) + vp.beta2_mom * mom_educ + eps_draw(1)); % correct wage for the mom
            vp.wage_list(i,r,t+1,2) = exp( vp.beta0_dad + vp.beta1_dad * dad_age(t+1) + vp.beta2_dad * dad_educ + eps_draw(2)); % correct wage for the dad
        end % t
    end % r
end % i

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% STEP 2: Simulate data given set of parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

sim_data = sim_choices(vp,fp,col_long,data);

% TRUNCATE/CENSOR SOME DATA HERE;
% Note that we followed this censoring procedure for the actual data:
% 1) if nonlabor income > 1000 or < 0, censor nonlabor income, wages and hours to NaN
% 2) if labor hours > 80, censor only that variable to NaN
% 3) if wage > 150, censor only that variable to NaN

% We will follow the exact same procedure, and moreover also replace every
% simulated data point by a NaN if the corresponding actual data point is
% missing (i.e. a NaN). This amounts to "overlapping" the NaNs in the
% simulated data with the NaNs from the original data set, for every
% simulation round r from 1 to R. Note that size(sim_data) = (fp.R*fp.N*(fp.M+1),fp.tot_cols)
% and size(data) = (fp.N *(fp.M+1),fp.nr_obs_vars)

% Note: we keep simulated child data for children aged 3 in 1997, in order to be able
% to fit wages, hours and nonlabor income moments at that age, since PSID has no observations for those variables in 1997

% Thus, as in DFW (2014), we retain (i.e. un-censor) some of the simulated data for when the child is aged 3
% in 1997 (i.e. replace the NaNs by the simulated data if child_age97 == 3,
% in order to be able to simulate certain variables (e.g. wages, hours, CCT choices) for ages 3 to 16 rather
% than 4 to 16.

% In particular, we don't censor variables 9 and 11 = w1 and w2, or variable 31 = CCT use
temp_nandata = zeros(size(data,1),fp.nr_obs_vars);
censor_cols = [1:8 10 12:30]; % all data columns except for 9, 11 and 31, where mom_wage, dad_wage and CCT are stored
temp_nandata(:,censor_cols) = isnan(data(:,censor_cols));

temp_sim_data_copy = sim_data;
sim_data = [];
for r = 1:fp.R
    rows_for_r = [((r-1)*(fp.N *(fp.M+1))+1) : (r * fp.N * (fp.M+1))]';
    temp_sim_data_r = temp_sim_data_copy(rows_for_r,:);
    % note: since child_age97 is a constant in all 17 rows, this does not censor ANY of this household's data!
    % For the purpose of making graphs of simulated results at all ages, we
    % un-censor the simulated (h1,h2) data for year 1997 (but not the other years)
    temp_nocensor = (temp_sim_data_r(:,col_long.child_age97)==temp_sim_data_r(:,col_long.age)) & (temp_sim_data_r(:,col_long.child_age97)==3); % 1 if age 3 in 1997, 0 otherwise
    temp_nocensor = repmat(temp_nocensor,1,fp.nr_obs_vars); % make it 30 columns wide now
    temp_censor = (temp_nandata == 1) & (temp_nocensor == 0);
    temp_sim_data_r(temp_censor) = NaN;
    sim_data = [sim_data; temp_sim_data_r];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% STEP 3: Calculate Simulated Moments
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

temp = build_moments(sim_data,col_long,fp,0);
sim_mom = temp.moments_all;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% STEP 4: Form Objective Function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if fp.estimate == 1
    diff = sim_mom - data_mom;
    obj_eval = diff'*fp.est_wt*diff;
else
    % Record all (properly transformed) parameter values
    % Note the order of the indices is a little messy after many previous
    % iterations of the model structure, adding/dropping parameters, etc.

    param_vector = NaN*ones(150,1);

    % 1. Preference parameters
    param_vector(1) = vp.alpha1;
    param_vector(2) = vp.alpha2;
    param_vector(3) = vp.alpha3;
    param_vector(4) = vp.alpha4;
    param_vector(5) = vp.lambda1;
    param_vector(6) = vp.lambda2;
    param_vector(7) = vp.lambda3;
    param_vector(8) = vp.alpha1_tilde;
    param_vector(9) = vp.alpha2_tilde;
    param_vector(10) = vp.alpha3_tilde;
    param_vector(11) = vp.alpha4_tilde;
    param_vector(12) = vp.alpha5_tilde;
    param_vector(13) = vp.alpha6_tilde;
    param_vector(39) = phi; % fixed altruism parameter
    param_vector(89) = cost_mean; % exponential distribution parameter of CCT utility cost

    % 2. Cognitive skill technology parameters
    % mother's time tau_1
    param_vector(40) = delta1_intercept;
    param_vector(41) = delta1_slope;
    param_vector(84) = delta1_educ;
    % father's time tau_2
    param_vector(42) = delta2_intercept;
    param_vector(43) = delta2_slope;
    param_vector(85) = delta2_educ; % tau2, dad_educ
    % child self-investment time tau_c
    param_vector(44) = delta3_intercept;
    param_vector(45) = delta3_slope;
    % child expenditures e
    param_vector(46) = delta4_intercept;
    param_vector(47) = delta4_slope;
    % lagged skills k
    param_vector(48) = delta5_intercept;
    param_vector(49) = delta5_slope;
    % TFP, for every age
    param_vector(90) = vp.R(3); %TFP at age 3
    param_vector(91) = vp.R(4); %TFP at age 4
    param_vector(92) = vp.R(5); %TFP at age 5
    param_vector(93) = vp.R(6); %TFP at age 6
    param_vector(94) = vp.R(7); %TFP at age 7
    param_vector(95) = vp.R(8); %TFP at age 8
    param_vector(96) = vp.R(9); %TFP at age 9
    param_vector(97) = vp.R(10); %TFP at age 10
    param_vector(98) = vp.R(11); %TFP at age 11
    param_vector(99) = vp.R(12); %TFP at age 12
    param_vector(100) = vp.R(13); %TFP at age 13
    param_vector(101) = vp.R(14); %TFP at age 14
    param_vector(102) = vp.R(15); %TFP at age 15
    param_vector(103) = vp.R(16); %TFP at age 16
    % Raw TFP parameters:
    param_vector(104) = L0;
    param_vector(105) = L;
    param_vector(106) = k;
    param_vector(107) = t0;

    % 3. Child discount factor process parameters
    % parameters of the discretized Markov process
    param_vector(108) = b0_up;
    param_vector(109) = b1_up;
    param_vector(110) = b2_up;
    param_vector(111) = b3_up;
    param_vector(112) = b0_down;
    param_vector(113) = b1_down;
    param_vector(114) = b2_down; % assumed symmetry
    param_vector(115) = b3_down; % assumed symmetry
    % initial conditions for beta_{c,t0}
    param_vector(116) = nu10;
    param_vector(117) = nu11;
    param_vector(118) = nu12;
    param_vector(119) = nu20;
    param_vector(120) = nu21;
    param_vector(121) = nu22;

    % 4. Wage process parameters
    param_vector(54) = vp.beta0_mom;
    param_vector(55) = vp.beta1_mom;
    param_vector(56) = vp.beta2_mom;
    param_vector(57) = sigma_mom;
    param_vector(58) = vp.beta0_dad;
    param_vector(59) = vp.beta1_dad;
    param_vector(60) = vp.beta2_dad;
    param_vector(61) = sigma_dad;
    param_vector(62) = wage_corr;

    % Save everything to obj_eval
    obj_eval.param_vector = param_vector; % needed for bootstrap SE
    obj_eval.sim_data = sim_data;
    obj_eval.betac_init = vp.betac_init_draws;
    obj_eval.markov_trans = markov_trans;
    obj_eval.sim_mom = sim_mom; % all moments
    diff = sim_mom - data_mom;
    obj_eval.obj_value = diff'*fp.est_wt*diff;
    temp = zeros(length(data_mom),1);
    for j = 1: length(data_mom)
        temp(j) = (sim_mom(j) - data_mom(j))^2 * fp.est_wt(j,j);
    end
    obj_eval.weight_perc = 100 * temp/sum(temp); % gives the percentage of obj_eval contributed by each moment
end

end % function